Simple Text Recognition

Problem 1

Create a Windows Application using Wintempla called TextCreate to create the training set input and the training set target to recognize text.

Problem 2

Open the TextCreate.cpp file an open Wintempla. Once Wintempla is open insert a button as shown.

Problem 3

Open the TextCreate.cpp file an open Wintempla. Once Wintempla is open make click anywhere to open the main window properties. In the Events tab check the Paint event as shown below. Press OK to close the properties. Press OK to close Wintempla.

Problem 4

Edit the TextCreate.h file as shown.

TextCreate.h

#pragma once //______________________________________ TextCreate.h
#include "resource.h"

#define FONT_SIZE 12
class TextCreate: public Win::Window
{
public:
     TextCreate()
     {
     }
     ~TextCreate()
     {
     }
     MATRIX screenBits;
     void GetCharacterBits(int x, int y, MATRIX& bits);
     void GetRowMean(const MATRIX& bits, valarray<double>& sum);
     void DrawTextLine(CG::Gdi& gdi, int y, const wchar_t* fontname);
     const wchar_t * GetClassName(){return L"TEXTCREATE";}
     ...
};

Problem 5

Edit the TextCreate.cpp file as shown.

TextCreate.cpp

#include "stdafx.h" //________________________________________ TextCreate.cpp
#include "TextCreate.h"

int APIENTRY wWinMain(HINSTANCE hInstance, HINSTANCE , LPTSTR cmdLine, int cmdShow){
     TextCreate app;
     app.CreateMainWindow(L"TextCreate", cmdShow, IDI_TEXTCREATE, NULL, (HBRUSH)(COLOR_WINDOW+1), hInstance);
     return app.MessageLoop(IDC_TEXTCREATE);
}

void TextCreate::Window_Open(Win::Event& e)
{
}

void TextCreate::Window_Paint(Win::Event& e)
{
     CG::Gdi gdi(hWnd, true, false);
     int y = 0;
     //_______________________________________ Draw the text lines
     DrawTextLine(gdi, 0, L"Arial");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Calibri");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Courier New");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Tahoma");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Times New Roman");
     //_____________________________________ Extract the pixels from the text lines
     const int rows = 5*FONT_SIZE;
     const int cols = ('Z' - 'A' +1)*FONT_SIZE;
     // Create the matrix to store the screen bits
     screenBits.resize(rows);
     int x;
     COLORREF pixel;
     int red, green, blue;
     int grayLevel;
     for(y = 0; y < rows; y++)
     {
          screenBits[y].resize(cols);
     }
     // Get the bits from the screen and store them in the screenBits matrix
     for(y = 0; y < rows; y++)
     {
          for(x = 0; x <cols; x++)
          {
               pixel = gdi.GetPixel_(x, y);
               red = GetRValue(pixel);
               green = GetGValue(pixel);
               blue = GetBValue(pixel);
               grayLevel = (red+green+blue)/3;
               screenBits[y][x] = grayLevel;
          }
     }
}

void TextCreate::DrawTextLine(CG::Gdi& gdi, int y, const wchar_t* fontname)
{
     CG::Font font(fontname, FONT_SIZE);
     gdi.Select(font);
     wchar_t text[2];
     text[1] = '\0';
     wchar_t c = 'A';
     for(int x = 0; c <= 'Z'; x+=FONT_SIZE, c++)
     {
          text[0] = c;
          gdi.TextOut(x, y, text);
     }
}

void TextCreate::btCreate_Click(Win::Event& e)
{
     const int numClasses = 'Z' - 'A' +1;
     const int numCases = 5*numClasses;
     //__________________________ Create the matrix fro the training set iput and the matrix for the target
     MATRIX trainSetInput;
     MATRIX trainSetTarget;
     Math::Oper::CreateMatrix(trainSetTarget, numCases, numClasses);
     //__________________________
     int indexFont;
     int indexClass;
     int x, y;
     int row = 0;
     valarray<double> mean(FONT_SIZE);
     MATRIX bits;
     Math::Oper::CreateMatrix(bits, FONT_SIZE, FONT_SIZE);

     for(indexFont = 0;indexFont < 5; indexFont++) // For each font family: Arial, Calibri, Courier New, Tahoma, Times New Roman
     {
          y = indexFont * FONT_SIZE;
          for(indexClass = 0; indexClass < numClasses; indexClass++) // For each class: A, B, C, ..., Z
          {
               x = indexClass*FONT_SIZE;
               //_____________________________________ Create the Training Set Input
               GetCharacterBits(x, y, bits);
               GetRowMean(bits, mean);
               trainSetInput.push_back(mean); // Insert the sum of each row for this character
               //_____________________________________ Create the Training Set Target
               if (row%numClasses == indexClass)
               {
                    trainSetTarget[row][indexClass] = 1.0; // It belongs to the class
               }
               row++;
          }
     }
     Sys::FileAssistant::CsvSave(L"trainSetInput.csv", trainSetInput);
     Sys::FileAssistant::CsvSave(L"trainSetTarget.csv", trainSetTarget);
     this->Destroy();
}

void TextCreate::GetRowMean(const MATRIX& bits, valarray<double>& mean)
{
     int x, y;
     for(y = 0; y < FONT_SIZE; y++)
     {
          mean[y] = 0.0;
          for(x = 0; x < FONT_SIZE; x++)
          {
               mean[y] += bits[y][x];
          }
          mean[y] /= FONT_SIZE;
     }
}

// Fill the matrix bits with the bits at the position x, y
void TextCreate::GetCharacterBits(int x, int y, MATRIX& bits)
{
     int X, Y;
     for(Y = 0; Y < FONT_SIZE; Y++)
     {
          for(X = 0; X < FONT_SIZE; X++)
          {
               bits[Y][X] = screenBits[y+Y][x+X];
          }
     }
}

Problem 6

Press the Create Training Set button. This will create the files: trainSetInput.csv and trainSetTarget.csv. These files will be place inside the TextCreate project folder.

Problem 7

Check the trainSetInput.csv file to verify that the information is correct. Each cell in the file is the mean of the gray level of each row in the character.

Problem 8

Check the trainSetTarget.csv file to verify that the information is correct. The first training case is the letter A, the second row is the letter B, and so on until the letter Z. Then we have the same characters in a different font family.

Problem 9

Create Neural Project called See to train an ANN to classify the letters from A to Z.

Problem 10

Edit the CheckTraining.lab file to check the training: (a) Compute the confusion matrix using the training set. (b) Plot the error for each network output. (c) Save the confusion matrix as a vector image (trainConf.emf).

Problem 11

Modify the TextCreate project to contaminate the characters with random pixels. Open the TextCreate.cpp file and modify the Window_Paint function as shown. Do not forget to change the filenames so that the program creates now the validSetInput.csv and validSetTarget.csv files.

TextCreate.cpp

...
void TextCreate::Window_Paint(Win::Event& e)
{
     CG::Gdi gdi(hWnd, true, false);
     int y = 0;
     //_______________________________________ Draw the text lines
     DrawTextLine(gdi, 0, L"Arial");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Calibri");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Courier New");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Tahoma");
     y+= FONT_SIZE;
     DrawTextLine(gdi, y, L"Times New Roman");

     //_____________________________________ Create some random pixels
     int x;
     int red, green, blue;
     const int numPixels = (int)(0.1*width*height); // Contaminate 10 percent of the pixels
     for(int i = 0; i < numPixels; i++)
     {
          x = width*rand()/RAND_MAX;
          y = height*rand()/RAND_MAX;
          red = 255*rand()/RAND_MAX;
          green = 255*rand()/RAND_MAX;
          blue = 255*rand()/RAND_MAX;
          gdi.SetPixel(x, y, RGB(red, green, blue));
     }
     //_____________________________________ Extract the pixels from the text lines
     const int rows = 5*FONT_SIZE;
     const int cols = ('Z' - 'A' +1)*FONT_SIZE;
     // Create the matrix to store the screen bits
     screenBits.resize(rows);
     //int x;
     COLORREF pixel;
     //int red, green, blue;
     int grayLevel;
     for(y = 0; y < rows; y++)
     {
          screenBits[y].resize(cols);
     }
     // Get the bits from the screen and store them in the screenBits matrix
     for(y = 0; y < rows; y++)
     {
          for(x = 0; x <cols; x++)
          {
               pixel = gdi.GetPixel_(x, y);
               red = GetRValue(pixel);
               green = GetGValue(pixel);
               blue = GetBValue(pixel);
               grayLevel = (red+green+blue)/3;
               screenBits[y][x] = grayLevel;
          }
     }
}

Problem 12

Edit the Validation.lab file to perform the validation of the ANN. (a) Compute the confusion matrix using the validation set. (b) Plot the error for each network output. (c) Save the confusion matrix as a vector image (validConf.emf).

Problem 13

How could you improve the performance of the classification?

Simple Text Recognition

© Copyright 2000-2021 Wintempla selo. All Rights Reserved. Jul 22 2021. Home